*This is a STATA Do-file to replicate the standard deviations of Table 4 in Feenstra-Li-Yu (2014, ReSTATA)            *
*The code is by Robert Feenstra (UC-Davis), Zhiyuan Li(SHUFE), and Miaojie Yu(Peking Univ.)                                                                        *
*To run the code, please put all datasets in the same directoray in your PC                                                                                        *
*After getting the standard deviations, we store it in a separate spreadsheet called "bootstrapped t-value" to manually calculate the t-values reported in the text*


********** STATA SETUP
clear
drop _all
set memory 31g
set matsize 11000
set more off

capture log close   
log using Table4_bootstrap.out, text replace



****generate blank stack to save outcomes********
g beta1s=0
g beta2s=0
g beta3s=0
g beta4s=0
g beta5s=0
g beta6s=0
g beta7s=0
g beta8s=0
g beta9s=0

g se1s=0
g se2s=0
g se3s=0
g se4s=0
g se5s=0
g se6s=0
g se7s=0
g se8s=0
g se9s=0

g beta1r=0
g beta2r=0
g beta3r=0
g beta4r=0
g beta5r=0
g beta6r=0
g beta7r=0
g beta8r=0
g beta9r=0
g beta10r=0
g beta11r=0
g beta12r=0

g se1r=0
g se2r=0
g se3r=0
g se4r=0
g se5r=0
g se6r=0
g se7r=0
g se8r=0
g se9r=0
g se10r=0
g se11r=0
g se12r=0

save  Table4_Stack, replace

************
* This program runs the panel bootstrapping by randomly drawing firms.
* There are in fact 5 steps to obtain the standard deviations in column 2 of Table 2 and in columns (1)-(2) of Table 3 in the following loop: 
*(1) The pliminary regression of firm TFP (called tfpop) on firm-level indicators, 4-digit industry indicators and ex-ante TFP (called tfpop2), and interactions between 2-digit industry indicators and other variables that appead on the right of Equ. (30) in the text (skipped here)
*(2) The selectin equation (30) in the text using fitted TFP
*(3) The second-step Heckman Equation excluding fitted TFP, used to obtain predicted export share
*(4) The  first-step of the 2SLS estimates, see footnote 26 in the text for details
*(5) The second-step of the 2SLS estimates, see footnote 26 in the text for details

***********************************
forvalues i=1/100 {
***********************************
* This is the dataset that have done the first step (i.e., prelim fit) and hence predicted TFP (tfpp) is already included there****
u Table4_tfpp




****bootstrap*********
bsample, cluster(newid)
disp "iteration # `i'"
**********************


***Step (1): Prelim Fit which is already done in previous step and its outcome variable, predicted TFP (tfpp) is already included in the current dataset**
***Step (2): The Selection Equation with fitted TFP (tfpp)*******************


xi: qui probit FX tfpp   tang_percent  tang_dummy klratio* dyear2-dyear9 i.cic_adj
predict XI


predict PROBITXB, xb
gen PDFPROBIT=(1/sqrt(2*_pi))*exp(-(PROBITXB^2/2))
gen CDFPROBIT = normprob(PROBITXB)
gen IMR_klratio = PDFPROBIT/CDFPROBIT      /* gets the inverse mills ratio*/
qui su IMR_klratio

****Step (3): the second-step Heckman Equation excluding fitted TFP, used to obtain predicted export share, shown as Column (4) of Table 3******************


xi: qui reg expint  tang_percent tang_dummy klratio* IMR_klratio dyear2-dyear9 i.cic_adj
predict expint_p





******************
drop if year>2006
******************

qui su expint if sea==1, d
qui su expint if nosea==1 , d

qui su expint if sea==1   & FX==1, d
qui su expint if nosea==1 & FX==1, d



g expintp_int    =expint_p*int_usd
g expintp_tang   =expint_p*tang_percent
g expintp_int_sea  =expintp_int*sea
g expintp_int_nosea=expintp_int*nosea
g expintp_sea      =expint_p*sea
g expintp_nosea      =expint_p*nosea


*****Generate variables for main estimates*********************
 g       iv            =exp(tfpop2)
 g       iv_expint     =iv*expint
 g       iv_expintsq   =iv*expintsq 
 
 g       iv_expintp    =iv*expint_p
 g       iv_expintpsq  =iv*(expint_p)^2  
 
 g iv_expintp_sea    =iv_expintp*sea
 g iv_expintp_nosea  =iv_expintp*nosea




*************************
*(3-2) two-ivs, ols
g diff=expint-expint_p
qui su diff if FX==1
g diffvar=r(Var)  


qui su expint if FX==1   
g etamean=r(mean)


g expintpsq=(expint_p)^2
g expintsqp=expintpsq/XI*(1+diffvar/(etamean)^2)
qui su expintsqp expintpsq

g expintsqp_int=expintsqp*int_usd

g expintsqp_int_sea   =expintsqp_int*sea
g expintsqp_int_nosea =expintsqp_int*nosea


g iv_expintsqp =iv*(expintsqp)  
g iv_expintsqp_sea=iv_expintsqp*sea
g iv_expintsqp_nosea=iv_expintsqp*nosea
 
g expintsqp_tang =expintsqp*tang_percent



****Table 4, Column (2)***********
qui su expint if FX==1, d

***1-digit industrial fixed effects***
g cic1d=floor(cic2d/10)
xi: qui ivreg2 rev_usd  (int_usd  expintp_int expintsqp_int =iv  iv_expintp iv_expintsqp) expint_p   FX  tang_percent expintp_tang expintsqp_tang tang_dummy i.cic1d    dyear* , robust 

g beta1s=_b[int_usd]
g beta2s=_b[expintp_int]
g beta3s=_b[expintsqp_int]
g beta4s=_b[expint_p]
g beta5s=_b[FX]
g beta6s=_b[tang_percent]
g beta7s=_b[expintp_tang]
g beta8s=_b[expintsqp_tang]
g beta9s=_b[tang_dummy]

g se1s=_se[int_usd]
g se2s=_se[expintp_int]
g se3s=_se[expintsqp_int]
g se4s=_se[expint_p]
g se5s=_se[FX]
g se6s=_se[tang_percent]
g se7s=_se[expintp_tang]
g se8s=_se[expintsqp_tang]
g se9s=_se[tang_dummy]



****************Table 4, Column (3)***
***One digit CIC***
xi i.cic1d
g expintp_Icic1d_2=expint_p*_Icic1d_2
g expintp_Icic1d_3=expint_p*_Icic1d_3
g expintp_Icic1d_4=expint_p*_Icic1d_4

g expintp_dy1=expint_p*dyear1
g expintp_dy2=expint_p*dyear2
g expintp_dy3=expint_p*dyear3
g expintp_dy4=expint_p*dyear4
g expintp_dy5=expint_p*dyear5
g expintp_dy6=expint_p*dyear6

xi: qui ivreg2 rev_usd  (int_usd  expintp_int_sea expintp_int_nosea expintsqp_int_sea expintsqp_int_nosea =iv  iv_expintp_sea iv_expintp_nosea iv_expintsqp_sea iv_expintsqp_nosea) expintp_sea expintp_nosea expintp_Icic1d_2 expintp_Icic1d_3 expintp_Icic1d_4 expintp_dy*   FX  tang_percent expintp_tang expintsqp_tang tang_dummy i.cic1d dyear*  , robust 



g beta1r=_b[int_usd]
g beta2r=_b[expintp_int_sea]
g beta3r=_b[expintsqp_int_sea]
g beta4r=_b[expintp_sea]
g beta5r=_b[FX]
g beta6r=_b[tang_percent]
g beta7r=_b[expintp_tang]
g beta8r=_b[expintsqp_tang]
g beta9r=_b[tang_dummy]
g beta10r=_b[expintp_int_nosea]
g beta11r=_b[expintsqp_int_nosea]
g beta12r=_b[expintp_nosea]

g se1r=_se[int_usd]
g se2r=_se[expintp_int_sea]
g se3r=_se[expintsqp_int_sea]
g se4r=_se[expintp_sea]
g se5r=_se[FX]
g se6r=_se[tang_percent]
g se7r=_se[expintp_tang]
g se8r=_se[expintsqp_tang]
g se9r=_se[tang_dummy]
g se10r=_se[expintp_int_nosea]
g se11r=_se[expintsqp_int_nosea]
g se12r=_se[expintp_nosea]

duplicates drop beta1r, force
keep beta* se*
  append using Table4_Stack
save Table4_Stack, replace
}
su

********** CLOSE OUTPUT
drop _all
log close